require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Hmisc::getHdata(nhgh)
d1 <- nhgh %>% 
  filter(sex == "female") %>% 
  filter(age >= 18) %>% 
  select(gh, ht) %>% 
  filter(1:n()<=1000)

1 ht

1.1 MM

1.1.1 Estimates of parameters

mm.gamma.shape = mean(d1$ht)^2/var(d1$ht)
mm.gamma.scale=var(d1$ht)/mean(d1$h)

mm.norm.mean=mean(d1$ht)
mm.norm.sd=sd(d1$ht)

mean.weib = function(lambda, k){
  lambda*gamma(1 + 1/k)
}

lambda = function(samp.mean, k){
  samp.mean/gamma(1+1/k)
}

var.weib = function(samp.mean, k,samp.var){
  (lambda(samp.mean, k))^2*(gamma(1+2/k)-(gamma(1+1/k))^2)-samp.var
}

1.1.2 Overlay estimated pdf onto histogram

mm.opt = optimize(f = function(x){abs(var.weib(k = x, samp.mean = mean(d1$ht), samp.var = var(d1$ht)))},lower = 10, upper = 100)
mm.weib.k = mm.opt$minimum
mm.weib.lambda= lambda(samp.mean = mean(d1$ht), k=mm.weib.k)

hist(d1$ht, main = "Height of Adult Females: MM", breaks = 100, freq = FALSE)
curve(dgamma(x, shape = mm.gamma.shape, scale = mm.gamma.scale), add = TRUE, col = "red")
curve(dnorm(x, mm.norm.mean, mm.norm.sd), add = TRUE, col = "blue", lwd = 2)
curve(dweibull(x, shape = mm.weib.k, scale = mm.weib.lambda),add = TRUE, col = "green",lwd = 2)

  • the red line is gamma, the blue line is norm and the green line is weibull

1.1.3 Overlay estimated CDF onto eCDF

plot(ecdf(d1$ht),main = "The CDF and ECDF of ht: MM")
curve(pgamma(x, shape = mm.gamma.shape, scale = mm.gamma.scale), add = TRUE, col = "red",lwd = 6)
curve(pnorm(x, mm.norm.mean, mm.norm.sd), add = TRUE, col = "blue", lwd = 4)
curve(pweibull(x, shape = mm.weib.k, scale = mm.weib.lambda),add = TRUE, col = "green",lwd = 4)

  • the red line is gamma, the blue line is norm and the green line is weibull

1.1.4 QQ plot (sample vs estimated dist)

p = ppoints(300)
y = quantile(d1$ht, probs = p)
#gamma
x.gamma.mm = qgamma(p, shape = mm.gamma.shape, scale = mm.gamma.scale)
plot(x.gamma.mm,y,main = "Gamma QQplot")
abline(0,1)

#norm
x.norm.mm = qnorm(p, mm.norm.mean, mm.norm.sd)
plot(x.norm.mm,y,main = "Norm QQplot")
abline(0,1)

#weibull
x.pwei.mm=qweibull(p, shape = mm.weib.k, scale = mm.weib.lambda)
plot(x.pwei.mm,y,main = "Weibull QQplot")
abline(0,1)

1.1.5 Estimated Median

median(d1$ht)
## [1] 160.8
qweibull(0.5, shape = mm.weib.k, scale = mm.weib.lambda)
## [1] 161.8065
qgamma(0.5, shape = mm.gamma.shape, scale = mm.gamma.scale)
## [1] 160.6308
qnorm(0.5, mm.norm.mean, mm.norm.sd)
## [1] 160.7419
  • we could know that the median of ht is 160.8 , and the norm median is 160.7419, the gamma median is 160.630794, and the qweibull median is 161.8065244

1.1.6 Median Samp Dist (hist)

#gamma
med.gam = NA
for (i in 1:1000) {
  data = rgamma(200, shape = mm.gamma.shape, scale = mm.gamma.scale)
  med.gam[i] = median(data)
}
hist(med.gam,breaks = 100,main = "Histogram of median of gamma distribution")

#norm
med.norm = NA
for (i in 1:1000) {
  data = rnorm(200, mm.norm.mean, mm.norm.sd)
  med.norm[i] = median(data)
}
hist(med.norm,breaks = 100,main = "Histogram of median of normal distribution")

#
med.wei = NA
for (i in 1:1000) {
  data = rweibull(200, shape = mm.weib.k, scale = mm.weib.lambda)
  med.wei[i] = median(data)
}
hist(med.wei,breaks = 100,main = "Histogram of median of weibull distribution")

1.1.7 Range of middle 95% of Samp Dist

diff(quantile(med.gam,probs = c(0.025,0.975)))
##    97.5% 
## 2.455218
diff(quantile(med.norm,probs = c(0.025,0.975)))
##    97.5% 
## 2.418323
diff(quantile(med.wei,probs = c(0.025,0.975)))
##    97.5% 
## 2.448809
  • Range of middle 95% of Samp Dist of gamma is 2.4552184, norm is 2.4183234, and the weibull is 2.4488091

1.2 MLE

1.2.1 Estimates of parameters

1.2.1.1 Normal distribution

library(stats4)
nLL <- function(mean, sd){
  fs <- dnorm(
    x = d1$ht
    ,mean = mean
    ,sd = sd
    ,log = TRUE
  )
  -sum(fs)
}

fit <- mle(
  nLL
  ,start = list(mean = 160, sd = 5)
  ,method = "L-BFGS-B"
  ,lower = c(0,0.01)
)

1.2.1.2 Gamma distribution

nLL.gamma <- function(shape, scale){
  fs <- dgamma(
    x = d1$ht
    ,shape = shape
    ,scale = scale
    ,log = TRUE
  )
  -sum(fs)
}

fit.gamma <- mle(
  nLL.gamma
  ,start = list(shape = 1, scale = 1)
  ,method = "L-BFGS-B"
  ,lower = c(0,0.01)
)

1.2.1.3 Weibull distribution

nLL.weibull <- function(shape, scale){
  fs <- dweibull(
    x = d1$ht
    ,shape = shape
    ,scale = scale
    ,log = TRUE
  )
  -sum(fs)
}

fit.weibull <- mle(
  nLL.weibull
  ,start = list(shape = 1, scale = 1)
  ,method = "L-BFGS-B"
  ,lower = c(0,0.01)
)

1.2.2 Overlay estimated pdf onto histogram

1.2.2.1 Normal distribution

hist(d1$ht,breaks = 100, freq = FALSE,,main = "Overlay estimated pdf onto histogram of Normal distribution")
curve(dnorm(x,mean = coef(fit)[1],sd =coef(fit)[2]),col = "red" ,add = TRUE, lwd = 4)

1.2.2.2 Gammal distribution

hist(d1$ht,breaks = 100, freq = FALSE,,main = "Overlay estimated pdf onto histogram of Gamma distribution")
curve(dgamma(x,shape = coef(fit.gamma)[1],scale =coef(fit.gamma)[2]),col = "blue" ,add = TRUE, lwd = 4)

1.2.2.3 Weibull distribution

hist(d1$ht,breaks = 100, freq = FALSE,main = "Overlay estimated pdf onto histogram of Weibull distribution")
curve(dweibull(x,shape = coef(fit.weibull)[1],scale =coef(fit.weibull)[2]),col = "green" ,add = TRUE, lwd = 4)

1.2.3 Overlay estimated CDF onto eCDF

1.2.3.1 Normal distribution

plot(ecdf(d1$ht),,main = "Overlay estimated CDF onto eCDF of normal distribution")
curve(pnorm(x,mean = coef(fit)[1],sd =coef(fit)[2]),col = "red" ,add = TRUE, lwd = 4)

1.2.3.2 Gamma distribution

plot(ecdf(d1$ht),,main = "Overlay estimated CDF onto eCDF of gamma distribution")
curve(pgamma(x,shape = coef(fit.gamma)[1],scale =coef(fit.gamma)[2]),col = "blue" ,add = TRUE, lwd = 4)

1.2.3.3 Weibull distribution

plot(ecdf(d1$ht),,main = "Overlay estimated CDF onto eCDF of weibull distribution")
curve(pweibull(x,shape = coef(fit.weibull)[1],scale =coef(fit.weibull)[2]),col = "green" ,add = TRUE, lwd = 4)

1.2.4 QQ plot (sample vs estimated dist)

1.2.4.1 Normal distribution

qs <- seq(0.05,0.95,length = 100)
samp_qs <- quantile(d1$ht,qs)
theor_qs <- qnorm(qs,mean = coef(fit)[1],sd =coef(fit)[2])
plot(samp_qs,theor_qs)
abline(0,1)

1.2.4.2 Gamma distribution

qs <- seq(0.05,0.95,length = 100)
samp_qs.gamma <- quantile(d1$ht,qs)
theor_qs.gamma <- qgamma(qs,shape = coef(fit.gamma)[1],scale =coef(fit.gamma)[2])
plot(samp_qs.gamma,theor_qs.gamma)
abline(0,1)

1.2.4.3 Weibull distribution

qs <- seq(0.05,0.95,length = 100)
samp_qs.w <- quantile(d1$ht,qs)
theor_qs.w <- qweibull(qs,shape = coef(fit.weibull)[1],scale =coef(fit.weibull)[2])
plot(samp_qs.w,theor_qs.w)
abline(0,1)

1.2.5 Estimated Median

1.2.5.1 Normal distribution

qnorm(0.5,mean = coef(fit)[1],sd = coef(fit)[2])
## [1] 160.7419

1.2.5.2 Gamma distribution

qgamma(0.5,shape = coef(fit.gamma)[1],scale =coef(fit.gamma)[2])
## [1] 160.6312

1.2.5.3 Weibull distribution

qweibull(0.5,shape = coef(fit.weibull)[1],scale =coef(fit.weibull)[2])
## [1] 161.5156

1.2.6 Median Samp Dist (hist)

1.2.6.1 Normal distribution

M <- 500
N <- 1000
out <- rnorm(N*M,mean = coef(fit)[1],sd =coef(fit)[2])  %>%  array(dim = c(M,N))

samp_dist <- apply(out,1,median)
hist(samp_dist,breaks = 100,,main = "Median Samp Dist (hist) of normal distribution")

1.2.6.2 Gamma distribution

M <- 500
N <- 1000
out <- rgamma(N*M,shape = coef(fit.gamma)[1],scale =coef(fit.gamma)[2])  %>%  array(dim = c(M,N))

samp_dist.g <- apply(out,1,median)
hist(samp_dist.g,breaks = 100,main = "Median Samp Dist (hist) of gamma distribution")

1.2.6.3 Weibull distribution

M <- 500
N <- 1000
out <- rweibull(N*M,shape = coef(fit.weibull)[1],scale =coef(fit.weibull)[2])  %>%  array(dim = c(M,N))

samp_dist.w <- apply(out,1,median)
hist(samp_dist.w,breaks = 100,main = "Median Samp Dist (hist) of weibull distribution")

### Range of middle 95% of Samp Dist

diff(quantile(samp_dist,c(0.05/2,1-0.05/2)))
##    97.5% 
## 1.112256
diff(quantile(samp_dist.g,c(0.05/2,1-0.05/2)))
##    97.5% 
## 1.130482
diff(quantile(samp_dist.w,c(0.05/2,1-0.05/2)))
##    97.5% 
## 1.354879
  • Range of middle 95% of Samp Dist of gamma is 1.1304816, norm is 1.1122563, and the weibull is 1.3548788

2 gt

2.1 MM

2.1.1 Estimates of parameters

#gamma
mm.gamma.shape.g = mean(d1$gh)^2/var(d1$gh)
mm.gamma.scale.g=var(d1$gh)/mean(d1$gh)
#normal
mm.norm.mean.g=mean(d1$gh)
mm.norm.sd.g=sd(d1$gh)
#weibull
mean.weib.g = function(lambda, k){
  lambda*gamma(1 + 1/k)
}

lambda.g = function(samp.mean, k){
  samp.mean/gamma(1+1/k)
}

var.weib.g = function(samp.mean, k,samp.var){
  (lambda(samp.mean, k))^2*(gamma(1+2/k)-(gamma(1+1/k))^2)-samp.var
}

mm.opt.g = optimize(f = function(x){abs(var.weib(k = x, samp.mean = mean(d1$gh), samp.var = var(d1$gh)))},lower = 10, upper = 100)
mm.weib.k.g = mm.opt.g$minimum
mm.weib.lambda.g= lambda(samp.mean = mean(d1$gh), k=mm.weib.k)

2.1.2 Overlay estimated pdf onto histogram

hist(d1$gh, main = "Height of Adult Females: MM", breaks = 100, freq = FALSE,xlim = c(3,12))
curve(dgamma(x, shape = mm.gamma.shape.g, scale = mm.gamma.scale.g), add = TRUE, col = "red")
curve(dnorm(x, mm.norm.mean.g, mm.norm.sd.g), add = TRUE, col = "blue", lwd = 2)
curve(dweibull(x, shape = mm.weib.k.g, scale = mm.weib.lambda.g),add = TRUE, col = "green",lwd = 2)

  • the red line is gamma, the blue line is norm and the green line is weibull

2.1.3 Overlay estimated CDF onto eCDF

plot(ecdf(d1$gh),main = "The CDF and ECDF of ht: MM")
curve(pgamma(x, shape = mm.gamma.shape.g, scale = mm.gamma.scale.g), add = TRUE, col = "red",lwd = 6)
curve(pnorm(x, mm.norm.mean.g, mm.norm.sd.g), add = TRUE, col = "blue", lwd = 4)
curve(pweibull(x, shape = mm.weib.k.g, scale = mm.weib.lambda.g),add = TRUE, col = "green",lwd = 4)

  • the red line is gamma, the blue line is norm and the green line is weibull

2.1.4 QQ plot (sample vs estimated dist)

p = ppoints(300)
y.g = quantile(d1$gh, probs = p)
#gamma
x.gamma.mm.g = qgamma(p, shape = mm.gamma.shape.g, scale = mm.gamma.scale.g)
plot(x.gamma.mm.g,y.g,main = "Gamma QQplot")
abline(0,1)

#norm
x.norm.mm.g = qnorm(p, mm.norm.mean.g, mm.norm.sd.g)
plot(x.norm.mm.g,y.g,main = "Norm QQplot")
abline(0,1)

#weibull
x.pwei.mm.g=qweibull(p, shape = mm.weib.k.g, scale = mm.weib.lambda.g)
plot(x.pwei.mm.g,y.g,main = "Weibull QQplot")
abline(0,1)

2.1.5 Estimated Median

median(d1$gh)
## [1] 5.5
qweibull(0.5, shape = mm.weib.k.g, scale = mm.weib.lambda.g)
## [1] 5.629781
qgamma(0.5, shape = mm.gamma.shape.g, scale = mm.gamma.scale.g)
## [1] 5.660259
qnorm(0.5, mm.norm.mean.g, mm.norm.sd.g)
## [1] 5.7246
  • we could know that the median of ht is 160.8 , and the norm median is 5.7246, the gamma median is 5.6602591, and the qweibull median is 5.6297806

2.1.6 Median Samp Dist (hist)

#gamma
med.gam.g = NA
for (i in 1:1000) {
  data = rgamma(200, shape = mm.gamma.shape.g, scale = mm.gamma.scale.g)
  med.gam.g[i] = median(data)
}
hist(med.gam.g,breaks = 100,main = "Histogram of median of gamma distribution")

#norm
med.norm.g = NA
for (i in 1:1000) {
  data = rnorm(200, mm.norm.mean.g, mm.norm.sd.g)
  med.norm.g[i] = median(data)
}
hist(med.norm.g,breaks = 100,main = "Histogram of median of normal distribution")

#
med.wei.g = NA
for (i in 1:1000) {
  data = rweibull(200, shape = mm.weib.k.g, scale = mm.weib.lambda.g)
  med.wei.g[i] = median(data)
}
hist(med.wei.g,breaks = 100,main = "Histogram of median of weibull distribution")

2.1.7 Range of middle 95% of Samp Dist

diff(quantile(med.gam.g,probs = c(0.025,0.975)))
##     97.5% 
## 0.3624233
diff(quantile(med.norm.g,probs = c(0.025,0.975)))
##     97.5% 
## 0.3917406
diff(quantile(med.wei.g,probs = c(0.025,0.975)))
##     97.5% 
## 0.2109234
  • Range of middle 95% of Samp Dist of gamma is 0.3624233, norm is 0.3917406, and the weibull is 0.2109234

2.2 MLE

2.2.1 Estimates of parameters

2.2.1.1 Normal distribution

library(stats4)
nLL.g <- function(mean, sd){
  fs <- dnorm(
    x = d1$gh
    ,mean = mean
    ,sd = sd
    ,log = TRUE
  )
  -sum(fs)
}

fit.g <- mle(
  nLL.g
  ,start = list(mean = 1, sd = 1)
  ,method = "L-BFGS-B"
  ,lower = c(0,0.01)
)

2.2.1.2 Gamma distribution

nLL.gamma.g <- function(shape, scale){
  fs <- dgamma(
    x = d1$gh
    ,shape = shape
    ,scale = scale
    ,log = TRUE
  )
  -sum(fs)
}

fit.gamma.g <- mle(
  nLL.gamma.g
  ,start = list(shape = 1, scale = 1)
  ,method = "L-BFGS-B"
  ,lower = c(0,0.01)
)

2.2.1.3 Weibull distribution

nLL.weibull.g <- function(shape, scale){
  fs <- dweibull(
    x = d1$gh
    ,shape = shape
    ,scale = scale
    ,log = TRUE
  )
  -sum(fs)
}

fit.weibull.g <- mle(
  nLL.weibull.g
  ,start = list(shape = 1, scale = 1)
  ,method = "L-BFGS-B"
  ,lower = c(0,0.01)
)

2.2.2 Overlay estimated pdf onto histogram

2.2.2.1 Normal distribution

hist(d1$gh,breaks = 100, freq = FALSE,xlim = c(4,12),main = "Overlay estimated pdf onto histogram of Normal distribution")
curve(dnorm(x,mean = coef(fit.g)[1],sd =coef(fit.g)[2]),col = "red" ,add = TRUE, lwd = 4)

2.2.2.2 Gammal distribution

hist(d1$gh,breaks = 100, freq = FALSE,,main = "Overlay estimated pdf onto histogram of Gamma distribution")
curve(dgamma(x,shape = coef(fit.gamma.g)[1],scale =coef(fit.gamma.g)[2]),col = "blue" ,add = TRUE, lwd = 4)

2.2.2.3 Weibull distribution

hist(d1$gh,breaks = 100, freq = FALSE,main = "Overlay estimated pdf onto histogram of Weibull distribution")
curve(dweibull(x,shape = coef(fit.weibull.g)[1],scale =coef(fit.weibull.g)[2]),col = "green" ,add = TRUE, lwd = 4)

2.2.3 Overlay estimated CDF onto eCDF

2.2.3.1 Normal distribution

plot(ecdf(d1$gh),,main = "Overlay estimated CDF onto eCDF of normal distribution")
curve(pnorm(x,mean = coef(fit.g)[1],sd =coef(fit.g)[2]),col = "red" ,add = TRUE, lwd = 4)

2.2.3.2 Gamma distribution

plot(ecdf(d1$gh),,main = "Overlay estimated CDF onto eCDF of gamma distribution")
curve(pgamma(x,shape = coef(fit.gamma.g)[1],scale =coef(fit.gamma.g)[2]),col = "blue" ,add = TRUE, lwd = 4)

2.2.3.3 Weibull distribution

plot(ecdf(d1$gh),,main = "Overlay estimated CDF onto eCDF of weibull distribution")
curve(pweibull(x,shape = coef(fit.weibull.g)[1],scale =coef(fit.weibull.g)[2]),col = "green" ,add = TRUE, lwd = 4)

2.2.4 QQ plot (sample vs estimated dist)

2.2.4.1 Normal distribution

qs <- seq(0.05,0.95,length = 100)
samp_qs.g <- quantile(d1$gh,qs)
theor_qs.g <- qnorm(qs,mean = coef(fit.g)[1],sd =coef(fit.g)[2])
plot(samp_qs.g,theor_qs.g)
abline(0,1)

2.2.4.2 Gamma distribution

qs <- seq(0.05,0.95,length = 100)
samp_qs.gamma.g <- quantile(d1$gh,qs)
theor_qs.gamma.g <- qgamma(qs,shape = coef(fit.gamma.g)[1],scale =coef(fit.gamma.g)[2])
plot(samp_qs.gamma.g,theor_qs.gamma.g)
abline(0,1)

2.2.4.3 Weibull distribution

qs <- seq(0.05,0.95,length = 100)
samp_qs.w.g <- quantile(d1$ht,qs)
theor_qs.w.g <- qweibull(qs,shape = coef(fit.weibull.g)[1],scale =coef(fit.weibull.g)[2])
plot(samp_qs.w.g,theor_qs.w.g)
abline(0,1)

2.2.5 Estimated Median

2.2.5.1 Normal distribution

qnorm(0.5,mean = coef(fit.g)[1],sd = coef(fit.g)[2])
## [1] 5.7246

2.2.5.2 Gamma distribution

qgamma(0.5,shape = coef(fit.gamma.g)[1],scale =coef(fit.gamma.g)[2])
## [1] 5.677983

2.2.5.3 Weibull distribution

qweibull(0.5,shape = coef(fit.weibull.g)[1],scale =coef(fit.weibull.g)[2])
## [1] 5.64902

2.2.6 Median Samp Dist (hist)

2.2.6.1 Normal distribution

M <- 500
N <- 1000
out.g <- rnorm(N*M,mean = coef(fit.g)[1],sd =coef(fit.g)[2])  %>%  array(dim = c(M,N))

samp_dist.g.g <- apply(out.g,1,median)
hist(samp_dist.g.g,breaks = 100,,main = "Median Samp Dist (hist) of normal distribution")

2.2.6.2 Gamma distribution

M <- 500
N <- 1000
out.gamma.g <- rgamma(N*M,shape = coef(fit.gamma.g)[1],scale =coef(fit.gamma.g)[2])  %>%  array(dim = c(M,N))

samp_dist.gamma.g <- apply(out.gamma.g,1,median)
hist(samp_dist.gamma.g,breaks = 100,main = "Median Samp Dist (hist) of gamma distribution")

2.2.6.3 Weibull distribution

M <- 500
N <- 1000
out.w.g <- rweibull(N*M,shape = coef(fit.weibull.g)[1],scale =coef(fit.weibull.g)[2])  %>%  array(dim = c(M,N))

samp_dist.w.g <- apply(out.w.g,1,median)
hist(samp_dist.w.g,breaks = 100,main = "Median Samp Dist (hist) of weibull distribution")

2.2.7 Range of middle 95% of Samp Dist

diff(quantile(samp_dist.g.g,c(0.05/2,1-0.05/2)))
##     97.5% 
## 0.1600192
diff(quantile(samp_dist.gamma.g,c(0.05/2,1-0.05/2)))
##    97.5% 
## 0.130245
diff(quantile(samp_dist.w.g,c(0.05/2,1-0.05/2)))
##     97.5% 
## 0.2410041
  • Range of middle 95% of Samp Dist of gamma is 0.130245, norm is 0.1600192, and the weibull is 0.2410041